library(tidyverse) # for data cleaning and plotting
library(googlesheets4) # for reading googlesheet data
library(lubridate) # for date manipulation
library(openintro) # for the abbr2state() function
library(palmerpenguins)# for Palmer penguin data
library(maps) # for map data
library(ggmap) # for mapping points on maps
library(gplots) # for col2hex() function
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
library(leaflet) # for highly customizable mapping
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
library(shiny) # for creating interactive apps
library(gifski)
library(ggridges)
library(ggimage)
gs4_deauth() # To not have to authorize each time you knit.
theme_set(theme_minimal())
# SNCF Train data
small_trains <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv")
# Lisa's garden data
garden_harvest <- read_sheet("https://docs.google.com/spreadsheets/d/1DekSazCzKqPS2jnGhKue7tLxRU3GVL1oxi-4bEM5IWw/edit?usp=sharing") %>%
mutate(date = ymd(date))
# Lisa's Mallorca cycling data
mallorca_bike_day7 <- read_csv("https://www.dropbox.com/s/zc6jan4ltmjtvy0/mallorca_bike_day7.csv?dl=1") %>%
select(1:4, speed)
# Heather Lendway's Ironman 70.3 Pan Am championships Panama data
panama_swim <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_swim_20160131.csv")
panama_bike <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_bike_20160131.csv")
panama_run <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_run_20160131.csv")
#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
NEW!! With animated graphs, add eval=FALSE to the code chunk that creates the animation and saves it using anim_save(). Add another code chunk to reread the gif back into the file. See the tutorial for help.
When you are finished with ALL the exercises, uncomment the options at the top so your document looks nicer. Don’t do it before then, or else you might miss some important warnings and messages.
ggplotly() function.variety_garden_graph <- garden_harvest %>%
filter(vegetable %in% c("lettuce")) %>%
arrange(desc(variety)) %>%
group_by(variety) %>%
summarize(n = n()) %>%
ggplot(aes(x = n, y = fct_reorder(variety, n), text = variety)) +
geom_col(fill = "black", color = "yellow") +
labs(title = "Harvest of Different Varieties of Lettuce",
x = "Count",
y = "Variety")
ggplotly(variety_garden_graph,
tooltip = c("text", "x"))
veggie_harvest_graph2 <- garden_harvest %>%
group_by(vegetable) %>%
summarize(total_wt_lbs = sum(weight)*0.00220462) %>%
ggplot() +
geom_col(aes(x = total_wt_lbs,
y = fct_reorder(vegetable,
total_wt_lbs,
.desc = FALSE),
text = vegetable)) +
labs(title = "Total Harvest by vegetable (lb)",
x = "Count",
y = "")
ggplotly(veggie_harvest_graph2,
tooltip = c("text", "x"))
small_trains dataset that contains data from the SNCF (National Society of French Railways). These are Tidy Tuesday data! Read more about it here.delay_small_trains <- small_trains %>%
group_by(service) %>%
filter(!is.na(service)) %>%
filter(departure_station %in% c("PARIS EST", "PARIS LYON"))
delay3 <- delay_small_trains %>%
ggplot(aes(x = avg_delay_all_departing,
y = service)) +
geom_density_ridges() +
transition_states(year) +
labs(title = "Departing Delay by French Train Services",
x = "Average Delay for Departing Trains (Minutes)",
y = "Train Service",
subtitle = "Moving to {next_state}")
anim_save("trains2.gif", delay3)
geom_area() examples here). You will look at cumulative harvest of tomato varieties over time. You should do the following:garden_harvest data, filter the data to the tomatoes and find the daily harvest in pounds for each variety.fct_reorder()) from most to least harvested (most on the bottom).garden_harvest %>%
filter(vegetable == "tomatoes") %>%
complete(variety, date = seq.Date(min(date), max(date), by="day")) %>%
select(-c(vegetable, units)) %>%
mutate(weight = replace_na(weight, 0)) %>%
group_by(variety, date) %>%
summarize(daily_harvest_lb = sum(weight)*0.00220462) %>%
mutate(cumsum_daily_harvest_lb = cumsum(daily_harvest_lb)) %>%
select(-daily_harvest_lb) %>%
ggplot() +
geom_area(aes(x = date, y = cumsum_daily_harvest_lb, fill = variety), position = position_stack()) +
transition_reveal(date) +
labs(title = "Cumulative Harvest of Tomatoe Variety over Time",
x = "Date",
y = "Cumulative Daily Harvest (Lb)",
subtitle = "Moving to {frame_along}")
anim_save("harvest1.gif")
mallorca_bike_day7 bike ride using animation! Requirements:ggmap.ggimage package and geom_image to add a bike image instead of a red point. You can use this image. See here for an example.bike_image_link <- "https://raw.githubusercontent.com/llendway/animation_and_interactivity/master/bike.png"
mallorca_bike_day7 <- mallorca_bike_day7 %>%
mutate(image = bike_image_link)
mallorca_map <- get_stamenmap(
bbox = c(left = 2.28, bottom = 39.41, right = 3.03, top = 39.8),
maptype = "terrain",
zoom = 11
)
ggmap(mallorca_map) +
geom_point(data = mallorca_bike_day7,
aes(x = lon, y = lat),
color = "red", size = .5) +
geom_path(data = mallorca_bike_day7,
aes(x = lon, y = lat, color = ele),
size = .5) +
labs(title = "Mallorca Bike Trail",
subtitle = "Time: {frame_along}") +
geom_image(data = mallorca_bike_day7,
aes(x = lon, y = lat, image = bike_image_link),
size = 0.075) +
transition_reveal(time) +
scale_color_viridis_c(option = "magma") +
theme_map() +
theme(legend.background = element_blank())
anim_save("bike1.gif")
I personally prefer this animated map over the static map as there are details that we are able to observe on the animated map, such as the direction of the bike ride and the bike that represents Lisa on her bike. Lastly, doesn’t almost any animated plot look better than a static plot? At least in my opinion.
panama_swim, panama_bike, and panama_run. Create a similar map to the one you created with my cycling data. You will need to make some small changes: 1. combine the files (HINT: bind_rows(), 2. make the leading dot a different color depending on the event (for an extra challenge, make it a different image using `geom_image()!), 3. CHALLENGE (optional): color by speed, which you will need to compute on your own from the data. You can read Heather’s race report here. She is also in the Macalester Athletics Hall of Fame and still has records at the pool.total_trail <- panama_swim %>%
bind_rows(list(panama_run, panama_bike))
panama_map <- get_stamenmap(
bbox = c(left = -79.56, bottom = 8.88, right = -79.41, top = 9.001),
maptype = "terrain",
zoom = 13
)
ggmap(panama_map) +
geom_point(data = total_trail,
aes(x = lon, y = lat, color = event, shape = event),
size = 2) +
geom_path(data = total_trail,
aes(x = lon, y = lat, color = event),
alpha = 0.8, size = 0.5) +
labs(title = "Ironman 70.3 Pan Am Championship",
subtitle = "Time: {frame_along}") +
scale_color_viridis_d(option = "magma") +
theme_map() +
theme(legend.background = element_blank()) +
transition_reveal(time)
anim_save("panama.gif")
lag() function you’ve used in a previous set of exercises). Replace missing values with 0’s using replace_na().geom_path() and add a group aesthetic. Put the x and y axis on the log scale and make the tick labels look nice - scales::comma is one option. This plot will look pretty ugly as is.geom_point()) and add the state name as a label (geom_text() - you should look at the check_overlap argument).animate() function to have 200 frames in your animation and make it 30 seconds long.covid19 %>%
group_by(state) %>%
mutate(lag7 = lag(cases, 7, order_by = date)) %>%
replace_na(list(lag7 = 0)) %>%
mutate(new_cases_past_week = cases - lag7) %>%
filter(cases >= 20) %>%
ggplot(aes(x = cases, y = new_cases_past_week, group = state)) +
geom_point(color = "red") +
geom_path(color = "light blue") +
geom_text(aes(label = state), check_overlap = TRUE) +
scale_x_log10(labels = scales::comma) +
scale_y_log10(labels = scales::comma) +
labs(
title = "Trajectory of US COVID-19 Confirmed Cases",
x = "Total Confirmed Cases",
y = "New Confirmed Cases (in the Past Week)",
subtitle = "Date: {frame_along}"
) +
theme(legend.position = "none") +
transition_reveal(date) -> covid19trajectory_gganim
animate(covid19trajectory_gganim,
nframes = 200,
duration = 30)
anim_save("covid2.gif")
There is a lot to observe in this visualization as all states have been included. Once again, this plot very clearly shows the drastic surge in COVID-19 cases in New York and New Jersey at the start of the pandemic. We also observe how a state like Vermont does extremely well in dealing with COVID-19 as their new confirmed cases in a week drop quickly in June, then see an increase again at the end of June and fall again at the start of August. Furthermore, their total number of confirmed cases is extremely low, around 1,200 people during the entire pandemic. Additionally, we are also able to observe the rapid increase of cases in both Florida and Texas in July going through both August and September.
census_pop_est_2018 <- read_csv("https://www.dropbox.com/s/6txwv3b4ng7pepe/us_census_2018_state_pop_est.csv?dl=1") %>%
separate(state, into = c("dot","state"), extra = "merge") %>%
select(-dot) %>%
mutate(state = str_to_lower(state))
covid19_population <-
covid19 %>%
mutate(state = str_to_lower(state)) %>%
left_join(census_pop_est_2018,
by = "state") %>%
group_by(state, est_pop_2018, date) %>%
summarize(cumulative_cases = max(cases)) %>%
mutate(cases_per_10000 = (cumulative_cases/est_pop_2018)*10000)
states_map <- map_data("state")
covid_map <- covid19_population %>%
mutate(state = str_to_lower(state), weekday = wday(date, label=TRUE)) %>%
filter(weekday == "Fri") %>%
ggplot() +
geom_map(map = states_map,
aes(map_id = state, fill = cases_per_10000, group = date)) +
expand_limits(x = states_map$long, y = states_map$lat) +
labs(title = "Cumulative COVID-19 cases per 10,000 people in the United States") +
theme(legend.background = element_blank()) +
theme_map() +
scale_fill_viridis_c() +
transition_states(date, transition_length = 0) +
labs(subtitle = "Moving to {next_state}")
animate(covid_map, duration = 30)
anim_save("covid.gif", covid_map)
We clearly see the sad reality of COVID-19 in the United States. It starts off with a rapidly rising number of cases per 10,000 people in New York, Lousiana, and Arizona. Shortly after, the whole country follows with almost every state having more than 300 COVID-19 cases per state except for a couple states, such as Vermont, Maine, and Oregon. We ultimately see the United States light up in green and yellow which means that almost every state has close to 200-300 cases per 10,000 people.
DID YOU REMEMBER TO UNCOMMENT THE OPTIONS AT THE TOP?